From 023e3bc72dc37d41f952e1c721c6ab4bf9370015 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Fri, 25 Sep 2009 10:50:18 +0100 Subject: [PATCH] x86: Make assigned devices' interrupts delivery to right vcpu. This patch targets for reducing IPIs when delivery VT-d's devices' intrs to target vcpus. According to the experiments leveraging 10G Oplin NIC card, CPU utilization can reduce 5%-6% and NIC's bandwidth keeps unchanged through testings. And this patch can always benefit UP guests with MSI-capable devices assigned and SMP guests whose lapic's destination mode is physical mode. And also it can benefit SMP guests whose lapic's dest_mode is logical mode but only one destination is specified. So it should cover major cases in real environment. Currenlty, this patch intercepts the programming for MSI intr status, and caculate the destination id for the pirq when do the programming in advance. When vcpu migratio n occurs or guest re-programe MSI status, it checks that whether needs to set the corresponding pirq's affinity of assigned devices and make vcpu's affinity and pirq's consistent to reduce the IPIs eventually. Signed-off-by : Xiantao Zhang Signed-off-by: Xiaohui Xin --- xen/arch/x86/hvm/hvm.c | 29 +++++++++++++++++++++++++++++ xen/arch/x86/hvm/vmsi.c | 23 +++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vmcs.c | 1 + xen/arch/x86/irq.c | 4 ++-- xen/drivers/passthrough/io.c | 16 ++++++++++++++-- xen/include/asm-x86/hvm/hvm.h | 3 +++ xen/include/asm-x86/irq.h | 2 ++ xen/include/xen/hvm/irq.h | 1 + 8 files changed, 75 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 81d133ff8b..7748b4579e 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -211,6 +211,35 @@ void hvm_migrate_timers(struct vcpu *v) pt_migrate(v); } +void hvm_migrate_pirqs(struct vcpu *v) +{ + int pirq, irq; + struct irq_desc *desc; + struct domain *d = v->domain; + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + + if ( !iommu_enabled || (hvm_irq_dpci == NULL) ) + return; + + spin_lock(&d->event_lock); + for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs); + pirq < d->nr_pirqs; + pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) ) + { + if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) || + (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) ) + continue; + desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL); + if (!desc) + continue; + irq = desc - irq_desc; + ASSERT(MSI_IRQ(irq)); + desc->handler->set_affinity(irq, *cpumask_of(v->processor)); + spin_unlock_irq(&desc->lock); + } + spin_unlock(&d->event_lock); +} + void hvm_do_resume(struct vcpu *v) { ioreq_t *p; diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c index 3662a4c757..e312c442d7 100644 --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -124,6 +124,29 @@ int vmsi_deliver(struct domain *d, int pirq) return 1; } +/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */ +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode) +{ + int dest_vcpu_id = -1, w = 0; + struct vcpu *v; + + if ( d->max_vcpus == 1 ) + return 0; + + for_each_vcpu ( d, v ) + { + if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) + { + w++; + dest_vcpu_id = v->vcpu_id; + } + } + if ( w > 1 ) + return -1; + + return dest_vcpu_id; +} + /* MSI-X mask bit hypervisor interception */ struct msixtbl_entry { diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 7d3ea0b1aa..11dc468521 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -940,6 +940,7 @@ void vmx_do_resume(struct vcpu *v) vmx_clear_vmcs(v); vmx_load_vmcs(v); hvm_migrate_timers(v); + hvm_migrate_pirqs(v); vmx_set_host_env(v); vpid_sync_vcpu_all(v); } diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 620d6b3f5a..b780d03ed8 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1517,7 +1517,7 @@ static void dump_irqs(unsigned char key) /* Only show CPU0 - CPU31's affinity info.*/ printk(" IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s" " status=%08x mapped, unbound\n", - irq, *(int*)cfg->domain.bits, cfg->vector, + irq, *(int*)desc->affinity.bits, cfg->vector, desc->handler->typename, desc->status); else { @@ -1525,7 +1525,7 @@ static void dump_irqs(unsigned char key) printk(" IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s " "status=%08x in-flight=%d domain-list=", - irq, *(int*)cfg->domain.bits, cfg->vector, + irq, *(int*)desc->affinity.bits, cfg->vector, desc->handler->typename, desc->status, action->in_flight); for ( i = 0; i < action->nr_guests; i++ ) diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index 198c08962d..052ab8d135 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -139,8 +139,10 @@ int pt_irq_create_bind_vtd( bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs); memset(hvm_irq_dpci->hvm_timer, 0, nr_irqs * sizeof(*hvm_irq_dpci->hvm_timer)); - for ( int i = 0; i < d->nr_pirqs; i++ ) + for ( int i = 0; i < d->nr_pirqs; i++ ) { INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list); + hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1; + } for ( int i = 0; i < NR_HVM_IRQS; i++ ) INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]); @@ -154,6 +156,8 @@ int pt_irq_create_bind_vtd( if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI ) { + uint8_t dest, dest_mode; + int dest_vcpu_id; if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping)) { @@ -195,6 +199,14 @@ int pt_irq_create_bind_vtd( hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec; hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags; } + /* Caculate dest_vcpu_id for MSI-type pirq migration */ + dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK; + dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK); + dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); + hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id; + spin_unlock(&d->event_lock); + if ( dest_vcpu_id >= 0 ) + hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]); } else { @@ -278,8 +290,8 @@ int pt_irq_create_bind_vtd( gdprintk(XENLOG_INFO VTDPREFIX, "VT-d irq bind: m_irq = %x device = %x intx = %x\n", machine_gsi, device, intx); + spin_unlock(&d->event_lock); } - spin_unlock(&d->event_lock); return 0; } diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 1a905272b0..76e06d7db6 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -157,6 +157,8 @@ void hvm_init_guest_time(struct domain *d); void hvm_set_guest_time(struct vcpu *v, u64 guest_time); u64 hvm_get_guest_time(struct vcpu *v); +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode); + #define hvm_paging_enabled(v) \ (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG)) #define hvm_wp_enabled(v) \ @@ -230,6 +232,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); +void hvm_migrate_pirqs(struct vcpu *v); static inline void hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2) diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h index 15fd2588f8..7a1c0e9d9b 100644 --- a/xen/include/asm-x86/irq.h +++ b/xen/include/asm-x86/irq.h @@ -112,6 +112,8 @@ void destroy_irq(unsigned int irq); struct irq_desc; extern void irq_complete_move(struct irq_desc **descp); +extern struct irq_desc *irq_desc; + void lock_vector_lock(void); void unlock_vector_lock(void); diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h index af298e5fe3..a4cd6fc337 100644 --- a/xen/include/xen/hvm/irq.h +++ b/xen/include/xen/hvm/irq.h @@ -50,6 +50,7 @@ struct dev_intx_gsi_link { struct hvm_gmsi_info { uint32_t gvec; uint32_t gflags; + int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */ }; struct hvm_mirq_dpci_mapping { -- 2.30.2